Clustering filtered spatial data

4/4/24

Load data and libraries

##################
# LOAD LIBRARIES #
##################
library(tidyverse)
library(Seurat)
library(tidyseurat)
library(cowplot)
library(patchwork)

source("../bin/spatial_visualization.R")
source("../bin/plotting_functions.R")

#########
# PATHS #
#########
input_dir <- "../results/02_integrate_st_data/"
result_dir <- "../results/03_clustering_st_data/"
if( isFALSE(dir.exists(result_dir)) ) { dir.create(result_dir,recursive = TRUE) }

#############
# LODA DATA #
#############
DATA <- readRDS(paste0(input_dir,"seuratObj_integrated.RDS"))

#################
# COLOUR PALLET #
#################
clus_1.5 <- c("#F8766D","#7CAE00","#CD9600","#00A9FF","#984EA3","#C77CFF","#00BFC4","#FF7F00",
              "#FFFF33","#E41A1C","#FF61CC","#4DAF4A","#A65628","#F781BF","#999999")
clus_1 <- c( "#CD9600","#7CAE00","#e0e067","#00A9FF","#377EB8","#984EA3","#E41A1C","#C77CFF",
             "#00BFC4","#FF7F00","#FFFF33","#CD9600","#4DAF4A","#A65628","#F781BF","#999999")

Clustering

##################################
# EVALUATE CLUSTERING RESOLUTION #
##################################
DATA <- FindNeighbors(DATA, reduction = "harmony", dims = 1:30, k.param = 15, prune.SNN = 1/15) 

# Clustering with louvain (algorithm 1) or leiden (algorithm 4)
for (res in c(0.1, 0.5, 1, 1.5, 2)) {
    DATA <- FindClusters(DATA, resolution = res, algorithm = 1)
}
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6598
Number of edges: 299026

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9375
Number of communities: 2
Elapsed time: 0 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6598
Number of edges: 299026

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8316
Number of communities: 7
Elapsed time: 0 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6598
Number of edges: 299026

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7608
Number of communities: 11
Elapsed time: 0 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6598
Number of edges: 299026

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.7115
Number of communities: 13
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 6598
Number of edges: 299026

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.6669
Number of communities: 16
Elapsed time: 0 seconds
# each time you run clustering, the data is stored in meta data columns:
# seurat_clusters - lastest results only RNA_snn_res.XX - for each different
# resolution you test.

UMAP of cluster resolutions

# dev.new(width=6.6929133858, height=3.3, noRStudioGD = TRUE)
res <- c("RNA_snn_res.1", "RNA_snn_res.1.5")
p <- map2(res, list(clus_1,clus_1.5),
          ~plot_clusters.fun(DATA, cluster=.x, txt_size = 10, dot_size = 0.2,
                             color = .y, red = "umap_harmony") + xlab("UMAP 1") + ylab("UMAP 2")) 
plot_grid(ncol = 2, 
          plotlist = p)

Cluster resolutions on tissue

# dev.new(width=6.6929133858, height=14, noRStudioGD = TRUE)
plots <- DATA %>%
  mutate(group = orig.ident) %>%
  nest(., data = -group) %>%
  mutate( "res_1" = pmap(., 
    ~plot_spatial.fun(..2, sampleid=..1, geneid="RNA_snn_res.1", 
                      point_size = 0.8, zoom="zoom", colors = clus_1))) %>%
  mutate( "res_2" = pmap(., 
    ~plot_spatial.fun(..2, sampleid=..1, geneid="RNA_snn_res.1.5",
                      point_size = 0.8, zoom="zoom", colors = clus_1.5)))

legend_1 <- get_legend(plots$res_1[[2]] + theme(legend.position="right"))
legend_2 <- get_legend(plots$res_2[[1]] + theme(legend.position="right"))
legend <- plot_grid( legend_1, legend_2, ncol = 1)
combined <- wrap_plots(plotlist=c(plots$res_1, plots$res_2), nrow = 8, byrow = F) & theme(legend.position="none")
combined <- plot_grid( combined, legend, ncol = 2, rel_widths = c(1, .3)) 
combined

Set cluster resolution

DATA <- DATA %>%
  rename(Clusters="RNA_snn_res.1") %>%
  SetIdent(., value = "Clusters") %>%
  select(-any_of(contains(c("RNA_snn_res","seurat_clusters"))))
# dev.new(width=3.3, height=3.3, noRStudioGD = TRUE)
p[[1]] + ggtitle("")

Spot distribution by clusters

DATA_sub <- DATA %>%
  mutate(gr = .$groups) %>%
  mutate(ID = .$orig.ident) %>%
  nest(., data=-c(gr, orig.ident)) %>%
  mutate(epi =  map(data, ~filter(.x, !(sp_annot == "SubMuc"))),
         subMuc =  map(data, ~filter(.x, sp_annot == "SubMuc"))) %>%
  mutate(across(c("epi", "subMuc"), ~map(., ~table(.x$Clusters)), .names = "{.col}_n_before")) %>%
  mutate(across(contains("_n_"), ~set_names(.x, paste0(.data[["gr"]],"_",.data[["orig.ident"]]))))
      
table(DATA$Clusters)

bind_cols(DATA_sub$epi_n_before, "Clus" = paste0("**",names(table(DATA$Clusters)),"**")) %>%
  rowwise() %>% 
  mutate(DMPA_sum = sum(c_across(starts_with("DMPA_"))),
         ctrl_sum = sum(c_across(starts_with("ctrl_")))) %>%
  select(sort(colnames(.)[1:8]), everything()) %>%
  knitr::kable(., caption = "Distribution of epithelial spots per cluster per subject")

bind_cols(DATA_sub$subMuc_n_before, "Clus" = paste0("**",names(table(DATA$Clusters)),"**")) %>%
  rowwise() %>% 
  mutate(DMPA_sum = sum(c_across(starts_with("DMPA_"))),
         ctrl_sum = sum(c_across(starts_with("ctrl_")))) %>%
  select(sort(colnames(.)[1:8]), everything()) %>%
  knitr::kable(., caption = "Distribution of submucosal spots per cluster per subject")
   0    1    2    3    4    5    6    7    8    9   10 
1148  884  747  656  646  558  511  507  471  432   38 
ctrl_P031 ctrl_P080 ctrl_P105 ctrl_P118 DMPA_P097 DMPA_P107 DMPA_P108 DMPA_P114 Clus DMPA_sum ctrl_sum
0 0 1 0 0 1 0 0 0 1 1
1 0 0 0 0 1 0 1 1 2 1
0 0 0 0 0 0 0 0 2 0 0
3 0 3 0 0 0 0 1 3 1 6
0 0 0 0 0 0 0 0 4 0 0
79 67 47 77 67 31 37 67 5 202 270
60 66 43 69 30 76 29 112 6 247 238
89 67 34 77 49 51 38 61 7 199 267
12 9 1 6 4 7 3 1 8 15 28
92 69 33 53 51 51 26 56 9 184 247
0 0 1 0 0 0 0 1 10 1 1

Distribution of epithelial spots per cluster per subject

ctrl_P031 ctrl_P080 ctrl_P105 ctrl_P118 DMPA_P097 DMPA_P107 DMPA_P108 DMPA_P114 Clus DMPA_sum ctrl_sum
150 208 124 201 227 20 34 182 0 463 683
131 192 118 121 182 10 36 91 1 319 562
37 155 72 91 231 35 9 117 2 392 355
46 100 76 130 141 25 43 88 3 297 352
24 73 76 192 123 37 21 100 4 281 365
1 5 8 20 41 1 1 9 5 52 34
0 3 0 0 4 19 0 0 6 23 3
1 33 0 2 4 0 0 1 7 5 36
24 46 59 85 102 14 20 78 8 214 214
0 0 0 0 1 0 0 0 9 1 0
1 1 0 0 5 29 0 0 10 34 2

Distribution of submucosal spots per cluster per subject

Plot final clusters on tissue section:

# Horizontal (aspect = width/height)
# dev.new(width=6.6929133858, height=3.5, noRStudioGD = TRUE)
############################
# PLOT FACET WRAP CLUSTERS #
############################
(p <- plot_st_meta.fun( DATA,
        feat =  "Clusters",
        zoom = "zoom",
        colors = clus_1,
        alpha = .9,
        #annot_col = "#dbd9d9",
        annot_line = .1,
        img_alpha = 0,
        point_size = .5))

###################
# ADD ANNOTATION #
##################
ord1 <- c("Sup_1","Sup_2","Basal_2","Basal_1","8","3","4","0","1","2","10")
ord2 <- c("6", "9", "7", "5","8","3","4","0","1","2","10")
epi_layers <- set_names(ord1, ord2)

DATA <- DATA %>%
mutate(layers = factor(epi_layers[as.character(.$Clusters)], levels = ord1))

The log2 count can work as a proxy for cell density. Here we see the distribution of counts per cluster

clus_col <- c("#E41A1C","#FF7F00", "#C77CFF","#984EA3","#00BFC4", "#00A9FF","#377EB8","#CD9600","#7CAE00", "#e0e067","#FFFF33","#FF61CC","#F781BF", "#999999")
clus_lvl <- c("6", "9", "7", "5","8","3","4","0","1","2","10")

DATA %>%
  mutate("Count (log2)" = log2(.$nCount_RNA)) %>%
  mutate(Clusters = factor(.$Clusters, levels = clus_lvl)) %>%
  
  ggplot(., aes(x=Clusters, y=`Count (log2)`, fill=Clusters, color=Clusters)) + 
  geom_violin() + theme_light() +
  scale_fill_manual(values = clus_col, aesthetics = c("fill","colour"))

Save seurat object

##################################
# SAVE INTERMEDIATE SEURAT OJECT #
##################################
saveRDS(DATA, paste0(result_dir,"seuratObj_clustered.RDS"))
# DATA <- readRDS(paste0(result_dir,"seuratObj_clustered.RDS"))

Session info

sessionInfo()
R version 4.3.3 (2024-02-29)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.7.3

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Stockholm
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] RColorBrewer_1.1-3 patchwork_1.2.0    cowplot_1.1.3      tidyseurat_0.8.0  
 [5] ttservice_0.4.0    SeuratObject_5.0.1 Seurat_4.3.0       lubridate_1.9.3   
 [9] forcats_1.0.0      stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2       
[13] readr_2.1.5        tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.0     
[17] tidyverse_2.0.0   

loaded via a namespace (and not attached):
  [1] deldir_2.0-2           pbapply_1.7-2          gridExtra_2.3         
  [4] rlang_1.1.3            magrittr_2.0.3         RcppAnnoy_0.0.22      
  [7] spatstat.geom_3.2-8    matrixStats_1.2.0      ggridges_0.5.6        
 [10] compiler_4.3.3         png_0.1-8              vctrs_0.6.5           
 [13] reshape2_1.4.4         pkgconfig_2.0.3        fastmap_1.1.1         
 [16] ellipsis_0.3.2         labeling_0.4.3         utf8_1.2.4            
 [19] promises_1.2.1         rmarkdown_2.25         tzdb_0.4.0            
 [22] xfun_0.42              jsonlite_1.8.8         goftest_1.2-3         
 [25] later_1.3.2            spatstat.utils_3.0-4   irlba_2.3.5.1         
 [28] parallel_4.3.3         cluster_2.1.6          R6_2.5.1              
 [31] ica_1.0-3              spatstat.data_3.0-4    stringi_1.8.3         
 [34] reticulate_1.35.0      parallelly_1.37.0      lmtest_0.9-40         
 [37] scattermore_1.2        Rcpp_1.0.12            knitr_1.45            
 [40] tensor_1.5             future.apply_1.11.1    zoo_1.8-12            
 [43] sctransform_0.4.1      httpuv_1.6.14          Matrix_1.6-5          
 [46] splines_4.3.3          igraph_2.0.2           timechange_0.3.0      
 [49] tidyselect_1.2.0       abind_1.4-5            rstudioapi_0.15.0     
 [52] yaml_2.3.8             spatstat.random_3.2-2  spatstat.explore_3.2-6
 [55] codetools_0.2-19       miniUI_0.1.1.1         listenv_0.9.1         
 [58] lattice_0.22-5         plyr_1.8.9             shiny_1.8.0           
 [61] withr_3.0.0            ROCR_1.0-11            evaluate_0.23         
 [64] Rtsne_0.17             future_1.33.1          survival_3.5-8        
 [67] polyclip_1.10-6        fitdistrplus_1.1-11    pillar_1.9.0          
 [70] KernSmooth_2.23-22     plotly_4.10.4          generics_0.1.3        
 [73] sp_2.1-3               hms_1.1.3              munsell_0.5.0         
 [76] scales_1.3.0           globals_0.16.2         xtable_1.8-4          
 [79] glue_1.7.0             lazyeval_0.2.2         tools_4.3.3           
 [82] data.table_1.15.0      RANN_2.6.1             fs_1.6.3              
 [85] leiden_0.4.3.1         dotCall64_1.1-1        grid_4.3.3            
 [88] colorspace_2.1-0       nlme_3.1-164           cli_3.6.2             
 [91] spatstat.sparse_3.0-3  spam_2.10-0            fansi_1.0.6           
 [94] viridisLite_0.4.2      uwot_0.1.16            gtable_0.3.4          
 [97] digest_0.6.34          progressr_0.14.0       ggrepel_0.9.5         
[100] farver_2.1.1           htmlwidgets_1.6.4      htmltools_0.5.7       
[103] lifecycle_1.0.4        httr_1.4.7             mime_0.12             
[106] MASS_7.3-60.0.1